In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
In [2]:
import sys
sys.path.append('.')
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt

import pandas as pd

import incense
from incense import ExperimentLoader
from cycler import cycler
#from sklearn.manifold import TSNE
#from MulticoreTSNE import MulticoreTSNE as TSNE
from tsnecuda import TSNE

import matplotlib as mpl
In [3]:
loaded = np.load('../../Cleaned_up/data.npz')
y_valid = np.argmax(loaded['test_labels_cat'],axis=1)
y_valid
cmap = plt.cm.tab10
#%load_ext wurlitzer
tsne = TSNE()
#tsne = TSNE(n_jobs=8)
In [4]:
def plot_manifold(X_valid_2D,X_valid,x,y,n):
    plt.subplot(x, y,n)
    cmap = plt.cm.tab10
    plt.scatter(X_valid_2D[:, 0], X_valid_2D[:, 1], c=y_valid, s=10, cmap=cmap)
    image_positions = np.array([[1., 1.]])
    for index, position in enumerate(X_valid_2D):
        dist = np.sum((position - image_positions) ** 2, axis=1)
        if np.min(dist) > 0.04: # if far enough from other images
            image_positions = np.r_[image_positions, [position]]
            imagebox = mpl.offsetbox.AnnotationBbox(
                mpl.offsetbox.OffsetImage(X_valid[index], cmap="binary"),
                position, bboxprops={"edgecolor": cmap(y_valid[index]), "lw": 2})
            plt.gca().add_artist(imagebox)
    plt.axis("off")
In [5]:
loader = ExperimentLoader(
    mongo_uri="mongodb://localhost:27017", 
    db_name='sacred'
)
In [6]:
#name_list=['predictions_df_0']
name_list=['predictions_df_0','predictions_df_50','predictions_df_100']

#name_list=['predictions_df_0','predictions_df_10','predictions_df_20','predictions_df_30','predictions_df_40','predictions_df_50','predictions_df_60','predictions_df_70','predictions_df_80','predictions_df_90','predictions_df_100']

#name_list=['predictions_df_10','predictions_df_20']
In [7]:
# query = {"$and": [
#            {"config.autoencoder_type": "nomal_dim_tied"},
          
#        ]}
# experiments=loader.find(query)
# #experiments=experiments[4:]
In [8]:
query = {"$or": [
            {"_id": 3},
            {"_id": 6}, #Discarted
            #{"_id": 7}, Discarted
            {"_id": 68},
            {"_id": 16},
            {"_id": 11}, #Discarted
            #{"_id": 72}, Discarted
            #{"_id": 27}, Discarted
            {"_id": 33},
            #{"_id": 36}, Discarted
            #{"_id": 41}, Discarted
            {"_id": 20},
            #{"_id": 24}, Discarted
            #{"_id": 75}, Discarted
            {"_id": 60},
            {"_id": 65},
    
            {"_id": 61}, 
            {"_id": 64}, 

        ],
        "$and": [
            {"$or": [
            {"config.autoencoder_type": "Over_dim"},
            {"config.autoencoder_type": "Over_dim_iteration"},
            {"config.autoencoder_type": "Over_dim_tied"},
            {"config.autoencoder_type": "Over_dim_tied_iteration"},
            
            {"config.autoencoder_type": "normal_dim"},
            {"config.autoencoder_type": "normal_dim_iteration"},
            {"config.autoencoder_type": "normal_dim_tied"},
            {"config.autoencoder_type": "normal_dim_tied_iteration"},
            ]},
            
            {"$or": [
            #{"config.targets_type": "Mnist"},
            {"config.targets_type": "10_Targets"},
            #{"config.targets_type": "Noisy"},
            ]},
            
            ],
          
        }
        
        
    
    
    
experiments=loader.find(query)
In [9]:
dtf=experiments.project(on=["config.targets_type","config.targets_type", "config.autoencoder_type", "config.batch_size"])
dtf
Out[9]:
targets_type autoencoder_type batch_size
exp_id
3 10_Targets Over_dim 64
16 10_Targets normal_dim 32
33 10_Targets normal_dim_iteration 32
64 10_Targets Over_dim_tied_iteration 64
65 10_Targets Over_dim_tied_iteration 32
In [10]:
aux=loaded['x_test_targets'][[y_valid.tolist().index(i) for i in range(10)]]
In [11]:
y_valid=np.append(y_valid, range(10))
In [12]:
pos_x=len(experiments)
pos_y=8*2

for data in name_list:
######################## Loop over data list
    plt.figure(figsize=(2,0.5))
    plt.text(0.1, 0.1, data, fontsize=12) 
    plt.axis("off")

    plt.show()

    plt.figure(figsize=(10*pos_x, 8*pos_y))

    ################# Loop Over experiments
    for n,exp in enumerate(experiments,1):
        
        ###################
        pickle_artifact = exp.artifacts[data].as_type(incense.artifact.PickleArtifact)
        predictions=pd.read_pickle(pickle_artifact.file,compression='gzip')
        
        for counter ,compressed in enumerate(zip(predictions['predictions'],predictions['predictions'])): 
            if not (np.isnan(compressed[0]).any() or np.isinf(compressed[0]).any()):
                
              
                comp=compressed[0]
                comp=np.vstack((comp,aux))
                
                X_valid_2D = tsne.fit_transform(comp)
                X_valid_2D = (X_valid_2D - X_valid_2D.min()) / (X_valid_2D.max() - X_valid_2D.min())
                X_valid=compressed[1].reshape(10000,28,28)


                plt.subplot(pos_y,pos_x,n+counter*pos_x*2)
                if (counter==0):
                    plt.title(" ".join(str(x) for x in dtf[['autoencoder_type','batch_size','targets_type']].iloc[n-1].values))
                else :
                    plt.title(" ".join(str(x) for x in ["Iteration : ",counter])) 
                plt.scatter(X_valid_2D[:10000, 0], X_valid_2D[:10000, 1], c=y_valid[:10000], s=10, cmap=cmap)
                plt.scatter(X_valid_2D[10000:, 0], X_valid_2D[10000:, 1], marker="x",c=y_valid[10000:], s=400, cmap=cmap)
                plt.axis("off")

                
                plt.subplot(pos_y,pos_x,n+counter*pos_x*2+pos_x)
                plt.scatter(X_valid_2D[:10000, 0], X_valid_2D[:10000, 1], c=y_valid[:10000], s=10, cmap=cmap)

                image_positions = np.array([[1., 1.]])
                for index, position in enumerate(X_valid_2D[:10000]):
                    dist = np.sum((position - image_positions) ** 2, axis=1)
                    if np.min(dist) > 0.04: # if far enough from other images
                        image_positions = np.r_[image_positions, [position]]
                        imagebox = mpl.offsetbox.AnnotationBbox(
                            mpl.offsetbox.OffsetImage(X_valid[index], cmap="binary"),
                            position, bboxprops={"edgecolor": cmap(y_valid[index]), "lw": 2})
                        plt.gca().add_artist(imagebox)
                plt.axis("off")



                X_valid_2D=[]
                X_valid=[]
        
        pickle_artifact=[]
        predictions=[]
            

##################
    plt.show;